-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[HLSL] Make memory representation of boolean vectors in HLSL, vectors of i32. Add support for boolean swizzling. #123977
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-hlsl @llvm/pr-subscribers-clang Author: Sarah Spall (spall) ChangesMake the memory representation of boolean vectors in HLSL, vectors of i32. Full diff: https://github.com/llvm/llvm-project/pull/123977.diff 6 Files Affected:
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index 054f8d1eadb8c5..906876f1d878bd 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -1986,6 +1986,10 @@ llvm::Value *CodeGenFunction::EmitLoadOfScalar(Address Addr, bool Volatile,
if (const auto *ClangVecTy = Ty->getAs<VectorType>()) {
// Boolean vectors use `iN` as storage type.
if (ClangVecTy->isExtVectorBoolType()) {
+ if (getLangOpts().HLSL) {
+ llvm::Value *Value = Builder.CreateLoad(Addr, Volatile, "load_boolvec");
+ return EmitFromMemory(Value, Ty);
+ }
llvm::Type *ValTy = ConvertType(Ty);
unsigned ValNumElems =
cast<llvm::FixedVectorType>(ValTy)->getNumElements();
@@ -2064,6 +2068,9 @@ llvm::Value *CodeGenFunction::EmitToMemory(llvm::Value *Value, QualType Ty) {
if (Ty->isExtVectorBoolType()) {
llvm::Type *StoreTy = convertTypeForLoadStore(Ty, Value->getType());
+ if (getLangOpts().HLSL)
+ return Builder.CreateZExt(Value, StoreTy);
+
// Expand to the memory bit width.
unsigned MemNumElems = StoreTy->getPrimitiveSizeInBits();
// <N x i1> --> <P x i1>.
@@ -2081,6 +2088,9 @@ llvm::Value *CodeGenFunction::EmitToMemory(llvm::Value *Value, QualType Ty) {
llvm::Value *CodeGenFunction::EmitFromMemory(llvm::Value *Value, QualType Ty) {
if (Ty->isExtVectorBoolType()) {
const auto *RawIntTy = Value->getType();
+ if (getLangOpts().HLSL)
+ return Builder.CreateTrunc(Value, ConvertType(Ty), "loadedv");
+
// Bitcast iP --> <P x i1>.
auto *PaddedVecTy = llvm::FixedVectorType::get(
Builder.getInt1Ty(), RawIntTy->getPrimitiveSizeInBits());
@@ -2343,7 +2353,13 @@ RValue CodeGenFunction::EmitLoadOfExtVectorElementLValue(LValue LV) {
if (!ExprVT) {
unsigned InIdx = getAccessedFieldNo(0, Elts);
llvm::Value *Elt = llvm::ConstantInt::get(SizeTy, InIdx);
- return RValue::get(Builder.CreateExtractElement(Vec, Elt));
+
+ llvm::Value *Element = Builder.CreateExtractElement(Vec, Elt);
+
+ if (getLangOpts().HLSL && LV.getType()->isBooleanType())
+ Element = Builder.CreateTrunc(Element, ConvertType(LV.getType()));
+
+ return RValue::get(Element);
}
// Always use shuffle vector to try to retain the original program structure
@@ -2354,6 +2370,10 @@ RValue CodeGenFunction::EmitLoadOfExtVectorElementLValue(LValue LV) {
Mask.push_back(getAccessedFieldNo(i, Elts));
Vec = Builder.CreateShuffleVector(Vec, Mask);
+
+ if (getLangOpts().HLSL && LV.getType()->isExtVectorBoolType())
+ Vec = EmitFromMemory(Vec, LV.getType());
+
return RValue::get(Vec);
}
@@ -2407,6 +2427,12 @@ void CodeGenFunction::EmitStoreThroughLValue(RValue Src, LValue Dst,
// Read/modify/write the vector, inserting the new element.
llvm::Value *Vec = Builder.CreateLoad(Dst.getVectorAddress(),
Dst.isVolatileQualified());
+ llvm::Type *OldVecTy = Vec->getType();
+ if (getLangOpts().HLSL && Dst.getType()->isExtVectorBoolType())
+
+ Vec =
+ Builder.CreateTrunc(Vec, ConvertType(Dst.getType()), "truncboolv");
+
auto *IRStoreTy = dyn_cast<llvm::IntegerType>(Vec->getType());
if (IRStoreTy) {
auto *IRVecTy = llvm::FixedVectorType::get(
@@ -2420,6 +2446,10 @@ void CodeGenFunction::EmitStoreThroughLValue(RValue Src, LValue Dst,
// <N x i1> --> <iN>.
Vec = Builder.CreateBitCast(Vec, IRStoreTy);
}
+
+ if (getLangOpts().HLSL && Dst.getType()->isExtVectorBoolType())
+ Vec = Builder.CreateZExt(Vec, OldVecTy);
+
Builder.CreateStore(Vec, Dst.getVectorAddress(),
Dst.isVolatileQualified());
return;
diff --git a/clang/lib/CodeGen/CodeGenTypes.cpp b/clang/lib/CodeGen/CodeGenTypes.cpp
index 09191a4901f493..778161c6a5818e 100644
--- a/clang/lib/CodeGen/CodeGenTypes.cpp
+++ b/clang/lib/CodeGen/CodeGenTypes.cpp
@@ -112,6 +112,12 @@ llvm::Type *CodeGenTypes::ConvertTypeForMem(QualType T) {
// Check for the boolean vector case.
if (T->isExtVectorBoolType()) {
auto *FixedVT = cast<llvm::FixedVectorType>(R);
+
+ if (Context.getLangOpts().HLSL) {
+ llvm::Type *IRElemTy = ConvertTypeForMem(Context.BoolTy);
+ return llvm::FixedVectorType::get(IRElemTy, FixedVT->getNumElements());
+ }
+
// Pad to at least one byte.
uint64_t BytePadded = std::max<uint64_t>(FixedVT->getNumElements(), 8);
return llvm::IntegerType::get(FixedVT->getContext(), BytePadded);
diff --git a/clang/lib/Sema/SemaExprMember.cpp b/clang/lib/Sema/SemaExprMember.cpp
index d130e8b86bc56d..dcf7feab752b5e 100644
--- a/clang/lib/Sema/SemaExprMember.cpp
+++ b/clang/lib/Sema/SemaExprMember.cpp
@@ -1697,7 +1697,7 @@ static ExprResult LookupMemberExpr(Sema &S, LookupResult &R,
QualType(), false);
}
- if (BaseType->isExtVectorBoolType()) {
+ if (BaseType->isExtVectorBoolType() && !S.Context.getLangOpts().HLSL) {
// We disallow element access for ext_vector_type bool. There is no way to
// materialize a reference to a vector element as a pointer (each element is
// one bit in the vector).
diff --git a/clang/test/CodeGenHLSL/BasicFeatures/standard_conversion_sequences.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/standard_conversion_sequences.hlsl
index 1665a0260ab054..6770efefe94feb 100644
--- a/clang/test/CodeGenHLSL/BasicFeatures/standard_conversion_sequences.hlsl
+++ b/clang/test/CodeGenHLSL/BasicFeatures/standard_conversion_sequences.hlsl
@@ -91,13 +91,12 @@ void l4_to_i2() {
// CHECK-LABEL: i2_to_b2
// CHECK: [[l2:%.*]] = alloca <2 x i32>
-// CHECK: [[b2:%.*]] = alloca i8
+// CHECK: [[b2:%.*]] = alloca <2 x i32>
// CHECK: store <2 x i32> splat (i32 8), ptr [[i2]]
// CHECK: [[veci2:%.*]] = load <2 x i32>, ptr [[i2]]
// CHECK: [[vecb2:%.*]] = icmp ne <2 x i32> [[veci2]], zeroinitializer
-// CHECK: [[vecb8:%.*]] = shufflevector <2 x i1> [[vecb2]], <2 x i1> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-// CHECK: [[i8:%.*]] = bitcast <8 x i1> [[vecb8]] to i8
-// CHECK: store i8 [[i8]], ptr [[b2]]
+// CHECK: [[vecb8:%.*]] = zext <2 x i1> [[vecb2]] to <2 x i32>
+// CHECK: store <2 x i32> [[vecb8]], ptr [[b2]]
void i2_to_b2() {
vector<int, 2> i2 = 8;
vector<bool, 2> b2 = i2;
@@ -105,14 +104,13 @@ void i2_to_b2() {
// CHECK-LABEL: d4_to_b2
// CHECK: [[d4:%.*]] = alloca <4 x double>
-// CHECK: [[b2:%.*]] = alloca i8
+// CHECK: [[b2:%.*]] = alloca <2 x i32>
// CHECK: store <4 x double> splat (double 9.000000e+00), ptr [[d4]]
// CHECK: [[vecd4:%.*]] = load <4 x double>, ptr [[d4]]
// CHECK: [[vecb4:%.*]] = fcmp reassoc nnan ninf nsz arcp afn une <4 x double> [[vecd4]], zeroinitializer
// CHECK: [[vecd2:%.*]] = shufflevector <4 x i1> [[vecb4]], <4 x i1> poison, <2 x i32> <i32 0, i32 1>
-// CHECK: [[vecb8:%.*]] = shufflevector <2 x i1> [[vecd2]], <2 x i1> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-// CHECK: [[i8:%.*]] = bitcast <8 x i1> [[vecb8]] to i8
-// CHECK: store i8 [[i8]], ptr [[b2]]
+// CHECK: [[vecb8:%.*]] = zext <2 x i1> [[vecd2]] to <2 x i32>
+// CHECK: store <2 x i32> [[vecb8]], ptr [[b2]]
void d4_to_b2() {
vector<double,4> d4 = 9.0;
vector<bool, 2> b2 = d4;
diff --git a/clang/test/CodeGenHLSL/BoolVector.hlsl b/clang/test/CodeGenHLSL/BoolVector.hlsl
new file mode 100644
index 00000000000000..16aa6c0e56cee4
--- /dev/null
+++ b/clang/test/CodeGenHLSL/BoolVector.hlsl
@@ -0,0 +1,32 @@
+// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.3-library -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s
+
+// CHECK-LABEL: fn
+// CHECK: [[B:%.*]] = alloca <2 x i32>, align 1
+// CHECK-NEXT: store <2 x i32> splat (i32 1), ptr [[B]], align 1
+// CHECK-NEXT: [[BoolVec:%.*]] = load <2 x i32>, ptr [[B]], align 1
+// CHECK-NEXT: [[L:%.*]] = trunc <2 x i32> [[BoolVec:%.*]] to <2 x i1>
+// CHECK-NEXT: [[VecExt:%.*]] = extractelement <2 x i1> [[L]], i32 0
+// CHECK-NEXT: ret i1 [[VecExt]]
+bool fn() {
+ bool2 B = {true,true};
+ return B[0];
+}
+
+// CHECK-LABEL: fn2
+// CHECK: [[VAddr:%.*]] = alloca i32, align 4
+// CHECK-NEXT: [[A:%.*]] = alloca <2 x i32>, align 1
+// CHECK-NEXT: [[StoreV:%.*]] = zext i1 {{.*}} to i32
+// CHECK-NEXT: store i32 [[StoreV]], ptr [[VAddr]], align 4
+// CHECK-NEXT: [[L:%.*]] = load i32, ptr [[VAddr]], align 4
+// CHECK-NEXT: [[LoadV:%.*]] = trunc i32 [[L]] to i1
+// CHECK-NEXT: [[Vec:%.*]] = insertelement <2 x i1> poison, i1 [[LoadV]], i32 0
+// CHECK-NEXT: [[Vec1:%.*]] = insertelement <2 x i1> [[Vec]], i1 true, i32 1
+// CHECK-NEXT: [[Z:%.*]] = zext <2 x i1> [[Vec1]] to <2 x i32>
+// CHECK-NEXT: store <2 x i32> [[Z]], ptr [[A]], align 1
+// CHECK-NEXT: [[LoadBV:%.*]] = load <2 x i32>, ptr [[A]], align 1
+// CHECK-NEXT: [[LoadV2:%.*]] = trunc <2 x i32> [[LoadBV]] to <2 x i1>
+// CHECK-NEXT: ret <2 x i1> [[LoadV2]]
+bool2 fn2(bool V) {
+ bool2 A = {V,true};
+ return A;
+}
diff --git a/clang/test/CodeGenHLSL/builtins/ScalarSwizzles.hlsl b/clang/test/CodeGenHLSL/builtins/ScalarSwizzles.hlsl
index 97711c9ee25a10..8e246959df94fe 100644
--- a/clang/test/CodeGenHLSL/builtins/ScalarSwizzles.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/ScalarSwizzles.hlsl
@@ -11,13 +11,23 @@ int2 ToTwoInts(int V){
}
// CHECK-LABEL: ToFourFloats
-// [[splat:%.*]] = insertelement <1 x float> poison, float {{.*}}, i64 0
-// [[vec4:%.*]] = shufflevector <1 x float> [[splat]], <1 x float> poison, <4 x i32> zeroinitializer
+// CHECK: [[splat:%.*]] = insertelement <1 x float> poison, float {{.*}}, i64 0
+// CHECK: [[vec4:%.*]] = shufflevector <1 x float> [[splat]], <1 x float> poison, <4 x i32> zeroinitializer
// ret <4 x float> [[vec4]]
float4 ToFourFloats(float V){
return V.rrrr;
}
+// CHECK-LABEL: ToFourBools
+// CHECK: {{%.*}} = zext i1 {{.*}} to i32
+// CHECK: [[splat:%.*]] = insertelement <1 x i32> poison, i32 {{.*}}, i64 0
+// CHECK-NEXT: [[vec4:%.*]] = shufflevector <1 x i32> [[splat]], <1 x i32> poison, <4 x i32> zeroinitializer
+// CHECK-NEXT: [[vec2Ret:%.*]] = trunc <4 x i32> [[vec4]] to <4 x i1>
+// CHECK-NEXT: ret <4 x i1> [[vec2Ret]]
+bool4 ToFourBools(bool V) {
+ return V.rrrr;
+}
+
// CHECK-LABEL: FillOne
// CHECK: [[vec1Ptr:%.*]] = alloca <1 x i32>, align 4
// CHECK: store <1 x i32> splat (i32 1), ptr [[vec1Ptr]], align 4
@@ -93,6 +103,17 @@ vector<float, 1> FillOneHalfFloat(){
return .5f.r;
}
+// CHECK-LABEL: FillTrue
+// CHECK: [[Tmp:%.*]] = alloca <1 x i32>, align 1
+// CHECK-NEXT: store <1 x i1> splat (i1 true), ptr [[Tmp]], align 1
+// CHECK-NEXT: [[Vec1:%.*]] = load <1 x i32>, ptr [[Tmp]], align 1
+// CHECK-NEXT: [[Vec2:%.*]] = shufflevector <1 x i32> [[Vec1]], <1 x i32> poison, <2 x i32> zeroinitializer
+// CHECK-NEXT: [[Vec2Ret:%.*]] = trunc <2 x i32> [[Vec2]] to <2 x i1>
+// CHECK-NEXT: ret <2 x i1> [[Vec2Ret]]
+bool2 FillTrue() {
+ return true.xx;
+}
+
// The initial codegen for this case is correct but a bit odd. The IR optimizer
// cleans this up very nicely.
@@ -110,6 +131,24 @@ float2 HowManyFloats(float V) {
return V.rr.rr;
}
+// CHECK-LABEL: HowManyBools
+// CHECK: [[VAddr:%.*]] = alloca i32, align 4
+// CHECK-NEXT: [[Vec2Ptr:%.*]] = alloca <2 x i32>, align 1
+// CHECK-NEXT: [[Tmp:%.*]] = zext i1 {{.*}} to i32
+// CHECK-NEXT: store i32 [[Tmp]], ptr [[VAddr]], align 4
+// CHECK-NEXT: [[VVal:%.*]] = load i32, ptr [[VAddr]], align 4
+// CHECK-NEXT: [[Splat:%.*]] = insertelement <1 x i32> poison, i32 [[VVal]], i64 0
+// CHECK-NEXT: [[Vec2:%.*]] = shufflevector <1 x i32> [[Splat]], <1 x i32> poison, <2 x i32> zeroinitializer
+// CHECK-NEXT: [[Trunc:%.*]] = trunc <2 x i32> [[Vec2]] to <2 x i1>
+// CHECK-NEXT: store <2 x i1> [[Trunc]], ptr [[Vec2Ptr]], align 1
+// CHECK-NEXT: [[V2:%.*]] = load <2 x i32>, ptr [[Vec2Ptr]], align 1
+// CHECK-NEXT: [[V3:%.*]] = shufflevector <2 x i32> [[V2]], <2 x i32> poison, <2 x i32> zeroinitializer
+// CHECK-NEXT: [[LV1:%.*]] = trunc <2 x i32> [[V3]] to <2 x i1>
+// CHECK-NEXT: ret <2 x i1> [[LV1]]
+bool2 HowManyBools(bool V) {
+ return V.rr.rr;
+}
+
// This codegen is gnarly because `1.l` is a double, so this creates double
// vectors that need to be truncated down to floats. The optimizer cleans this
// up nicely too.
@@ -166,3 +205,39 @@ int AssignInt(int V){
X.x = V.x + V.x;
return X;
}
+
+// CHECK-LABEL: AssignBool
+// CHECK: [[VAddr:%.*]] = alloca i32, align 4
+// CHECK-NEXT: [[XAddr:%.*]] = alloca i32, align 4
+// CHECK-NEXT: [[Zext:%.*]] = zext i1 %V to i32
+// CHECK-NEXT: store i32 [[Zext]], ptr [[VAddr]], align 4
+// CHECK-NEXT: [[X:%.*]] = load i32, ptr [[VAddr]], align 4
+// CHECK-NEXT: [[Splat:%.*]] = insertelement <1 x i32> poison, i32 [[X]], i64 0
+// CHECK-NEXT: [[Y:%.*]] = extractelement <1 x i32> [[Splat]], i32 0
+// CHECK-NEXT: [[Z:%.*]] = trunc i32 [[Y]] to i1
+// CHECK-NEXT: [[A:%.*]] = zext i1 [[Z]] to i32
+// CHECK-NEXT: store i32 [[A]], ptr [[XAddr]], align 4
+// CHECK-NEXT: [[B:%.*]] = load i32, ptr [[VAddr]], align 4
+// CHECK-NEXT: [[Splat2:%.*]] = insertelement <1 x i32> poison, i32 [[B]], i64 0
+// CHECK-NEXT: [[C:%.*]] = extractelement <1 x i32> [[Splat2]], i32 0
+// CHECK-NEXT: [[D:%.*]] = trunc i32 [[C]] to i1
+// CHECK-NEXT: br i1 [[D]], label %lor.end, label %lor.rhs
+
+// CHECK: lor.rhs:
+// CHECK-NEXT: [[E:%.*]] = load i32, ptr [[VAddr]], align 4
+// CHECK-NEXT: [[Splat3:%.*]] = insertelement <1 x i32> poison, i32 [[E]], i64 0
+// CHECK-NEXT: [[F:%.*]] = extractelement <1 x i32> [[Splat3]], i32 0
+// CHECK-NEXT: [[G:%.*]] = trunc i32 [[F]] to i1
+// CHECK-NEXT: br label %lor.end
+
+// CHECK: lor.end:
+// CHECK-NEXT: [[H:%.*]] = phi i1 [ true, %entry ], [ [[G]], %lor.rhs ]
+// CHECK-NEXT: store i1 [[H]], ptr [[XAddr]], align 4
+// CHECK-NEXT: [[I:%.*]] = load i32, ptr [[XAddr]], align 4
+// CHECK-NEXT: [[LoadV:%.*]] = trunc i32 [[I]] to i1
+// CHECK-NEXT: ret i1 [[LoadV]]
+bool AssignBool(bool V) {
+ bool X = V.x;
+ X.x = V.x || V.x;
+ return X;
+}
|
Could you please also add a test with bool vector is in a struct? For example in this case we need to make sure the shape of S is |
…struct is a vector of i32. make sure constant struct has vector of i32 make clang format happy
clang/lib/CodeGen/CGExprConstant.cpp
Outdated
@@ -1984,6 +1984,15 @@ llvm::Constant *ConstantEmitter::emitForMemory(CodeGenModule &CGM, | |||
return Res; | |||
} | |||
|
|||
// In HLSL bool vectors are stored in memory as a vector of i32 | |||
if (destType->isExtVectorBoolType() && CGM.getContext().getLangOpts().HLSL) { | |||
llvm::Type *boolVecTy = CGM.getTypes().ConvertTypeForMem(destType); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do we need special handling for non-hlsl ext-bool-vector types?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't believe so. This code is necessary for HLSL because the constant 'C' is a <2 x i1> but HLSL needs it to be a <2 x i32> hence the zero extension. For non HLSL vectors the expected form would be <2 x i1> so nothing needs to be done.
https://hlsl.godbolt.org/z/nsb6jd1vn
clang/lib/AST/ASTContext.cpp
Outdated
@@ -2016,8 +2016,9 @@ TypeInfo ASTContext::getTypeInfoImpl(const Type *T) const { | |||
case Type::Vector: { | |||
const auto *VT = cast<VectorType>(T); | |||
TypeInfo EltInfo = getTypeInfo(VT->getElementType()); | |||
Width = VT->isExtVectorBoolType() ? VT->getNumElements() | |||
: EltInfo.Width * VT->getNumElements(); | |||
Width = (VT->isExtVectorBoolType() && !getLangOpts().HLSL) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Maybe we should add VectorType::isPackedBoolType() or something like that? Then we can refactor the code so it doesn't explicitly check for HLSL all over the place.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I will investigate doing this because I also don't like all the special case checking for HLSL.
…ector and not hlsl
@@ -2064,6 +2064,9 @@ llvm::Value *CodeGenFunction::EmitToMemory(llvm::Value *Value, QualType Ty) { | |||
|
|||
if (Ty->isExtVectorBoolType()) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
you replaced a bunch of isExtVectorBoolType()
with isPackedVectorBoolType
. We are only doing HLSL modifications on the isExtVectorBoolType()
. But isExtVectorBoolType
doesn't mean the vector is not packed. Is the Zero extend and truncation to get them into a form that they will unpack?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
My intention was to replace calls to 'isExtVectorBoolType' with 'isPackedVectorBoolType' anywhere we want an hlsl boolean vector to follow the normal handling path for vectors; Hopefully reviews will verify I got this right.
Here we can't follow the normal vector path because it returns the value unchanged, and we need to convert a vec of i1s to a vec of i32s, which is why we zero extend here. The normally "boolean vector packing" does something different.
✅ With the latest revision this PR passed the C/C++ code formatter. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I commented on the first two that I saw, but I think there are a lot of places left where you're conditionalizing the CodeGen behavior on HLSL, that we could instead make the condition based on the types involved requiring conversions.
@@ -410,6 +410,12 @@ VectorType::VectorType(TypeClass tc, QualType vecType, unsigned nElements, | |||
VectorTypeBits.NumElements = nElements; | |||
} | |||
|
|||
bool Type::isPackedVectorBoolType(const ASTContext &ctx) const { | |||
if (ctx.getLangOpts().HLSL) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This might be a dumb suggestion, but is the a way to just check if the mem reprsentation is i32 or i1? HLSL is probably the only language mode that needs this distinction but it feel like this shouldn't have a lang opt toggle based on the function name.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The code to state that a bool vector should be a vector of i32s isn't accessible here.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think it would make for slightly better abstraction to add a hasPackedBoolVectors()
accessor to LangOptions
that returns !HLSL
, similar to how allowArrayReturnTypes()
works.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM!
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Looks good. A question and a suggestion for slightly more consistent abstraction inline.
@@ -410,6 +410,12 @@ VectorType::VectorType(TypeClass tc, QualType vecType, unsigned nElements, | |||
VectorTypeBits.NumElements = nElements; | |||
} | |||
|
|||
bool Type::isPackedVectorBoolType(const ASTContext &ctx) const { | |||
if (ctx.getLangOpts().HLSL) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think it would make for slightly better abstraction to add a hasPackedBoolVectors()
accessor to LangOptions
that returns !HLSL
, similar to how allowArrayReturnTypes()
works.
if (LV.getType()->isExtVectorBoolType()) | ||
Vec = Builder.CreateTrunc(Vec, ConvertType(LV.getType()), "truncv"); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Shouldn't this have a check for bitsize to avoid a no-op trunc, like we do elsewhere in this patch?
@@ -4701,9 +4737,13 @@ EmitExtVectorElementExpr(const ExtVectorElementExpr *E) { | |||
|
|||
// Store the vector to memory (because LValue wants an address). | |||
Address VecMem = CreateMemTemp(E->getBase()->getType()); | |||
// need to zero extend an hlsl boolean vector to store it back to memory |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Probably better to phrase this comment in terms of packed/non-packed boolean vectors given the abstraction in LangOpts
@@ -1978,7 +1978,10 @@ llvm::Constant *ConstantEmitter::emitForMemory(CodeGenModule &CGM, | |||
} | |||
|
|||
// Zero-extend bool. | |||
if (C->getType()->isIntegerTy(1) && !destType->isBitIntType()) { | |||
// In HLSL bool vectors are stored in memory as a vector of i32 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Same here
@@ -112,6 +112,12 @@ llvm::Type *CodeGenTypes::ConvertTypeForMem(QualType T) { | |||
// Check for the boolean vector case. | |||
if (T->isExtVectorBoolType()) { | |||
auto *FixedVT = cast<llvm::FixedVectorType>(R); | |||
|
|||
if (Context.getLangOpts().HLSL) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This could use the hasPackedBoolVectors
accessor I suggested elsewhere.
Make the memory representation of boolean vectors in HLSL, vectors of i32.
Allow boolean swizzling for boolean vectors in HLSL.
Add tests for boolean vectors and boolean vector swizzling.
Closes #91639